%needs optimization, statistic and ML toolboxes.

clear; clc;

close all; 

%% 1) Import factors

allfactors = csvread('../1data/factors.csv',1,0);

date       = allfactors(:,1);       %dates
 
rf         = allfactors(:,2);       %risk-free rates

factors    = allfactors(:,3:end);   %150 factors from Feng-Giglio-Xiu (2020) 

clear allfactors
%% 2) Factors' names and other info

summary    = readtable('../1data/summary.csv');   %Contains year of pub, paper, etc.

factorname = summary.Row;                         %Acronym for the factor 

factorname_full ...
           = summary.Descpription;                %Factor's Full Description 
       
%% 3) Competing Factor Models Fama-French 3 factors + 15 factors

index_short_factors ...
           = 1:1:150;            %Index of all factors used (we use the 150 but could look at a smaller subset)
       
factorname_short...
           = factorname(index_short_factors,1);
       
factors_short ...
           = factors(:,index_short_factors);

L          = length(date);

P          = size(factors_short,2);

%% 4) Portfolios and covariates

port_5x5   = csvread('../1data/port_5x5.csv',0,1);  %2,875 (5x5 bivariate) test portfolios from Feng-Giglio-Xiu (2020) replication materials

port_5x5   = port_5x5 - rf*ones(1,size(port_5x5,2)); %monthly excess return

X_2875          = zeros(size(port_5x5,2),P+1);

%Asset-by-asset time-series regression 
for i_port = 1:size(port_5x5,2)
   
    X_2875_aux ...
           = regress(port_5x5(:,i_port),[ones(L,1),factors_short]);
    
    X_2875(i_port,:) = [1,X_2875_aux(2:end,1)'];
    
    clear X_2875_aux
    
end
%% 5) Hyperparameters for Bayesian cross-sectional regressions 

addpath('../3functions')

Y_2875       = 100*12*mean(port_5x5,1)'; %Annualized average return of the portfolio

%Pick a_0, b_0 to maximize the marginal likelihood corresponding to the
%largest model 

f            = @(b0) hyperparameters_objective(b0,Y_2875,X_2875);

[b0,fval]    = fmincon(f,1,-1,0);        %Pick b0 to maximize marginal likelihood

a0           = a0_fun(b0,Y_2875,X_2875); %Pick optimal a0 as a function of b0

%% 6) Sample sizes and covariates for model competition  

sample_sizes ...
             = [25,2875];                %25 ports, 2875, etc.
         
n_sample_sizes ...
             = size(sample_sizes,2);
         
loss         = zeros(n_sample_sizes,3); %posterior loss for each sample size and model 

fit          = zeros(n_sample_sizes,3); %model fit

uncertainty  = zeros(n_sample_sizes,3); %model uncertainty
  
for i_sample_sizes = 1: n_sample_sizes

%Competing Model 1: Fama-French 
FF_factors ...
             = [find(strcmp(factorname_full,'Excess Market Return')),...
                find(strcmp(factorname_full,'Small Minus Big')),...
                find(strcmp(factorname_full,'High Minus Low'))];
                
X_FF         = X_2875(1:sample_sizes(1,i_sample_sizes),[1,FF_factors+1]);

[loss(i_sample_sizes,1),...
 fit(i_sample_sizes,1),...
 uncertainty(i_sample_sizes,1)] ...
             = ridge_loss(Y_2875(1:sample_sizes(1,i_sample_sizes)),...
                          X_FF,...
                          a0,b0,...
                          trace(X_FF'*X_FF./sample_sizes(1,i_sample_sizes))/size(X_FF,2));

clear X_FF 

%Competing Model 2: Giglio Selection
FGX_factors ...
             = [find(strcmp(factorname_full,'Quality Minus Junk')),...
                find(strcmp(factorname_full,'Robust Minus Weak')),...
                find(strcmp(factorname_full,'HXZ Investment')),...
                find(strcmp(factorname_full,'HXZ Profitability')),...
                find(strcmp(factorname_full,'Intermediary Risk Factor'))];
            
X_G          = X_2875(1:sample_sizes(1,i_sample_sizes),[1:136,FGX_factors+1]);

[loss(i_sample_sizes,2),...
 fit(i_sample_sizes,2),...
 uncertainty(i_sample_sizes,2)] ...
             = ridge_loss(Y_2875(1:sample_sizes(1,i_sample_sizes)),...
                          X_G,...
                          a0,b0,...
                          trace(X_G'*X_G./sample_sizes(1,i_sample_sizes))/size(X_G,2));

clear X_G

%Competing Model 3: All
X_all        = X_2875(1:sample_sizes(1,i_sample_sizes),:);

[loss(i_sample_sizes,3),...
 fit(i_sample_sizes,3),...
 uncertainty(i_sample_sizes,3)] ...
             = ridge_loss(Y_2875(1:sample_sizes(1,i_sample_sizes)),...
                          X_2875(1:sample_sizes(1,i_sample_sizes),:),...
                          a0,b0,...
                          trace(X_all'*X_all./sample_sizes(1,i_sample_sizes))/size(X_all,2));
         
end

%% 6) Figures

aux2 = max(loss,[],2); 

figure(5)

bar(100*loss./aux2) 

legend({'Fama-French 3 factors','FGX-DS 139 factors)','Factor zoo 150 Factors'})

legend boxoff

grid on

xticks(1:1:2)

xticklabels({'25','2,875'})

xlabel('Number of test portfolios')

ylabel('% Loss relative to worst competing model')

print(gcf,'-depsc2','../4figures/CompetingFactorModels.eps')

%% 

%{ 
include_5x5 = find(port_5x5_id.min_stk>=kk)';
port_5x5b = [];

for i = 1:P
    if ismember(i,include_5x5)
        port_5x5b = [port_5x5b port_5x5(:,(i*25-24):(i*25))];
    end
end

%}